; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-BE-P9

; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
; RUN:   -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9

define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_v16i8_v16i8:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lbz r3, 0(r3)
; CHECK-LE-P8-NEXT:    lbz r4, 0(r4)
; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v16i8:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-LE-P9-NEXT:    lxsibzx v3, 0, r4
; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI0_0@toc@ha
; CHECK-BE-P8-NEXT:    lbz r4, 0(r4)
; CHECK-BE-P8-NEXT:    lbz r3, 0(r3)
; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI0_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
; CHECK-BE-P8-NEXT:    vperm v2, v4, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    addis r5, r2, .LCPI0_0@toc@ha
; CHECK-BE-P9-NEXT:    lxsibzx v2, 0, r4
; CHECK-BE-P9-NEXT:    lxsibzx f1, 0, r3
; CHECK-BE-P9-NEXT:    addi r5, r5, .LCPI0_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs0, 0(r5)
; CHECK-BE-P9-NEXT:    xxperm v2, vs1, vs0
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C0(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lbz r4, 0(r4)
; CHECK-AIX-64-P8-NEXT:    lbz r3, 0(r3)
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    ld r5, L..C0(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r4
; CHECK-AIX-64-P9-NEXT:    lxsibzx f1, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r5)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C0(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lbz r4, 0(r4)
; CHECK-AIX-32-P8-NEXT:    lbz r3, 0(r3)
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v3, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lwz r5, L..C0(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxsibzx f1, 0, r3
; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r5)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <1 x i8>, ptr %a, align 4
  %bc1 = bitcast <1 x i8> %0 to i8
  %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0
  %1 = load <1 x i8>, ptr %b, align 8
  %bc2 = bitcast <1 x i8> %1 to i8
  %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0
  %2 = bitcast <16 x i8> %vecinit3 to <16 x i8>
  %3 = bitcast <16 x i8> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v16i8_none(<16 x i8> %a, i8 %b) {
; CHECK-LE-P8-LABEL: test_v16i8_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
; CHECK-LE-P8-NEXT:    mtvsrd v4, r5
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI1_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v16i8_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtvsrwz v3, r5
; CHECK-LE-P9-NEXT:    vinsertb v2, v3, 15
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v16i8_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r5
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI1_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v16i8_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrwz v3, r5
; CHECK-BE-P9-NEXT:    vinsertb v2, v3, 0
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C1(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-64-P9-NEXT:    vinsertb v2, v3, 0
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C1(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v16i8_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-32-P9-NEXT:    vinsertb v2, v3, 0
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %vecins = insertelement <16 x i8> %a, i8 %b, i32 0
  ret <16 x i8> %vecins
}

define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v16i8:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v16i8:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-LE-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v16i8:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v16i8:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrwz v2, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
; CHECK-BE-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI2_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C2(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C1(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v16i8:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxv v2, 0(r4)
; CHECK-AIX-32-P9-NEXT:    stb r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs = load <16 x i8>, ptr %b, align 4
  %rhs = insertelement <16 x i8> undef, i8 %arg, i32 0
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v16i8_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r4
; CHECK-LE-P8-NEXT:    mtfprd f1, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r4
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    mtfprd f0, r3
; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -32
; CHECK-AIX-32-P8-NEXT:    stb r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v16i8_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stb r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
  %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
  %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v8i16_v16i8:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r4
; CHECK-LE-P8-NEXT:    mtfprd f1, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r4
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    mtfprd f0, r3
; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
; CHECK-BE-P8-NEXT:    sldi r3, r3, 48
; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
; CHECK-BE-P9-NEXT:    sldi r3, r3, 48
; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -32
; CHECK-AIX-32-P8-NEXT:    stb r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v16i8:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stb r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
  %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
  %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI5_0@toc@ha
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r4
; CHECK-LE-P8-NEXT:    mtvsrd v4, r3
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI5_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs1, 0, r5
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
; CHECK-LE-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI5_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI5_0@toc@ha
; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI5_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrwz v2, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
; CHECK-BE-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI5_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C3(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C2(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r4)
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxv v2, 0(r4)
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs = load <16 x i8>, ptr %b, align 4
  %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
  %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) {
; CHECK-LE-P8-LABEL: test_v8i16_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI6_0@toc@ha
; CHECK-LE-P8-NEXT:    mtvsrd v4, r5
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI6_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtvsrwz v3, r5
; CHECK-LE-P9-NEXT:    vinserth v2, v3, 14
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI6_0@toc@ha
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r5
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI6_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrwz v3, r5
; CHECK-BE-P9-NEXT:    vinserth v2, v3, 0
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C4(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-64-P9-NEXT:    vinserth v2, v3, 0
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r4, L..C2(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-32-P9-NEXT:    vinserth v2, v3, 0
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %vecins = insertelement <8 x i16> %a, i16 %b, i32 0
  ret <8 x i16> %vecins
}

define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) {
; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r3
; CHECK-LE-P9-NEXT:    mtvsrws v3, r4
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
; CHECK-BE-P8-NEXT:    sldi r4, r4, 32
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
; CHECK-BE-P9-NEXT:    mtvsrws v3, r4
; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r4
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v16i8_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stb r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
  %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
  %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r4
; CHECK-LE-P8-NEXT:    mtfprd f1, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r4
; CHECK-LE-P9-NEXT:    mtvsrws v3, r3
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
; CHECK-BE-P9-NEXT:    mtvsrws v3, r3
; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r3
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -32
; CHECK-AIX-32-P8-NEXT:    stb r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v16i8:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stb r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
  %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
  %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) {
; CHECK-LE-P8-LABEL: test_none_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
; CHECK-LE-P8-NEXT:    mtvsrwz v4, r5
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI9_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    addis r3, r2, .LCPI9_1@toc@ha
; CHECK-LE-P8-NEXT:    addi r3, r3, .LCPI9_1@toc@l
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r3
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    xxswapd v3, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprwz f0, r5
; CHECK-LE-P9-NEXT:    xxinsertw v2, vs0, 8
; CHECK-LE-P9-NEXT:    xxinsertw v2, vs0, 0
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI9_0@toc@ha
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r5
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI9_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    addis r3, r2, .LCPI9_1@toc@ha
; CHECK-BE-P8-NEXT:    addi r3, r3, .LCPI9_1@toc@l
; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-BE-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtfprwz f0, r5
; CHECK-BE-P9-NEXT:    xxinsertw v2, vs0, 4
; CHECK-BE-P9-NEXT:    xxinsertw v2, vs0, 12
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r4, L..C5(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT:    ld r3, L..C6(r2) # %const.1
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtfprwz f0, r3
; CHECK-AIX-64-P9-NEXT:    xxinsertw v2, vs0, 4
; CHECK-AIX-64-P9-NEXT:    xxinsertw v2, vs0, 12
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C3(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C4(r2) # %const.1
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v4, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    mtfprwz f0, r4
; CHECK-AIX-32-P9-NEXT:    xxinsertw v2, vs0, 4
; CHECK-AIX-32-P9-NEXT:    xxinsertw v2, vs0, 12
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %conv = trunc i64 %b to i32
  %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
  %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3
  ret <4 x i32> %vecins2
}

define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_v4i32_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI10_0@toc@ha
; CHECK-LE-P8-NEXT:    lbzx r4, 0, r4
; CHECK-LE-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI10_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    mtvsrwz v2, r4
; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsiwzx v2, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
; CHECK-LE-P9-NEXT:    lxsibzx v3, 0, r4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI10_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    vspltb v3, v3, 7
; CHECK-LE-P9-NEXT:    xxperm v2, v3, vs0
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lbzx r4, 0, r4
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI10_0@toc@ha
; CHECK-BE-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
; CHECK-BE-P8-NEXT:    addi r4, r5, .LCPI10_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r4
; CHECK-BE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-BE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
; CHECK-BE-P9-NEXT:    lxsibzx v2, 0, r4
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI10_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    vspltb v2, v2, 7
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lbzx r4, 0, r4
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C7(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-64-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-64-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C3(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r4
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lbzx r4, 0, r4
; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C5(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-32-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C1(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <4 x i8>, ptr %a, align 4
  %bc1 = bitcast <4 x i8> %0 to i32
  %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0
  %1 = load <1 x i8>, ptr %b, align 8
  %bc2 = bitcast <1 x i8> %1 to i8
  %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0
  %2 = bitcast <4 x i32> %vecinit3 to <16 x i8>
  %3 = bitcast <16 x i8> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) {
; CHECK-LE-P8-LABEL: test_v16i8_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r3
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    mtfprd f0, r4
; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r3, r3, 56
; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v16i8_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r3, r3, 56
; CHECK-BE-P9-NEXT:    mtvsrd v3, r4
; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v16i8_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 56
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v16i8_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 56
; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r4
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    stb r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v16i8_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stb r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
  %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0
  %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) {
; CHECK-LE-P8-LABEL: test_v2i64_v16i8:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r4
; CHECK-LE-P8-NEXT:    mtfprd f1, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r4
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    mtfprd f0, r3
; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
; CHECK-LE-P9-NEXT:    vmrglh v2, v2, v3
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r4, r4, 56
; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
; CHECK-BE-P8-NEXT:    mtvsrd v2, r4
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v16i8:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r4, r4, 56
; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
; CHECK-BE-P9-NEXT:    mtvsrd v2, r4
; CHECK-BE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r4, r4, 56
; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r4
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    stb r5, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v16i8:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stb r5, -32(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
  %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0
  %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr {
; CHECK-LE-P8-LABEL: test_1_2:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI13_0@toc@ha
; CHECK-LE-P8-NEXT:    lbzx r3, 0, r3
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI13_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    mtvsrwz v2, r3
; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_1_2:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI13_0@toc@ha
; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI13_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    vspltb v2, v2, 7
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_1_2:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lbzx r3, 0, r3
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
; CHECK-BE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_1_2:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-BE-P9-NEXT:    vspltb v2, v2, 7
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_1_2:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lbzx r3, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-64-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_1_2:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-AIX-64-P9-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_1_2:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lbzx r3, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C6(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_1_2:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C2(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    vspltb v3, v2, 7
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, v3, vs0
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <1 x i8>, ptr %a, align 4
  %bc1 = bitcast <1 x i8> %0 to i8
  %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 8
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <16 x i8> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_none_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI14_0@toc@ha
; CHECK-LE-P8-NEXT:    lbzx r3, 0, r3
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI14_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    mtvsrwz v2, r3
; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_none_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI14_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-LE-P9-NEXT:    vspltb v2, v2, 7
; CHECK-LE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_none_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lbzx r3, 0, r3
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    mtvsrwz v2, r3
; CHECK-BE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_none_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-BE-P9-NEXT:    vspltb v2, v2, 7
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lbzx r3, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-64-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-AIX-64-P9-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lbzx r3, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C7(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsibzx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C3(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    vspltb v3, v2, 7
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, v3, vs0
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <1 x i8>, ptr %a, align 4
  %bc1 = bitcast <1 x i8> %0 to i8
  %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 8
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <16 x i8> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v2i64_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_v2i64_none:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI15_0@toc@ha
; CHECK-LE-P8-NEXT:    lbzx r4, 0, r4
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-LE-P8-NEXT:    addi r5, r5, .LCPI15_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs0, 0, r5
; CHECK-LE-P8-NEXT:    mtvsrwz v2, r4
; CHECK-LE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-LE-P8-NEXT:    xxswapd v4, vs0
; CHECK-LE-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_none:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
; CHECK-LE-P9-NEXT:    lxsibzx v3, 0, r4
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI15_0@toc@l
; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    vspltb v3, v3, 7
; CHECK-LE-P9-NEXT:    xxperm v2, v3, vs0
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_none:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lbzx r4, 0, r4
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
; CHECK-BE-P8-NEXT:    vspltb v2, v2, 7
; CHECK-BE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_none:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsibzx v3, 0, r4
; CHECK-BE-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-BE-P9-NEXT:    vspltb v3, v3, 7
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lbzx r4, 0, r4
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r3
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-64-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsibzx v3, 0, r4
; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    vspltb v3, v3, 7
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lbzx r4, 0, r4
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P8-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-32-P8-NEXT:    xxspltw v3, vs0, 1
; CHECK-AIX-32-P8-NEXT:    vspltb v2, v2, 7
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxsibzx v3, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxvwsx v2, 0, r3
; CHECK-AIX-32-P9-NEXT:    vspltb v3, v3, 7
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <8 x i8>, ptr %a, align 4
  %bc1 = bitcast <8 x i8> %0 to i64
  %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0
  %1 = load <1 x i8>, ptr %b, align 8
  %bc2 = bitcast <1 x i8> %1 to i8
  %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0
  %2 = bitcast <2 x i64> %vecinit3 to <16 x i8>
  %3 = bitcast <16 x i8> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
; CHECK-LE-P8-LABEL: test_v8i16_v8i16rhs:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtvsrd v2, r3
; CHECK-LE-P8-NEXT:    mtvsrd v3, r4
; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v8i16rhs:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtvsrd v2, r3
; CHECK-LE-P9-NEXT:    mtvsrd v3, r4
; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v8i16rhs:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI16_0@toc@ha
; CHECK-BE-P8-NEXT:    mtvsrwz v3, r4
; CHECK-BE-P8-NEXT:    addi r5, r5, .LCPI16_0@toc@l
; CHECK-BE-P8-NEXT:    mtvsrwz v4, r3
; CHECK-BE-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-BE-P8-NEXT:    vperm v2, v4, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v8i16rhs:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    addis r5, r2, .LCPI16_0@toc@ha
; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
; CHECK-BE-P9-NEXT:    mtfprwz f1, r3
; CHECK-BE-P9-NEXT:    addi r5, r5, .LCPI16_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs0, 0(r5)
; CHECK-BE-P9-NEXT:    xxperm v2, vs1, vs0
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C8(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v4, r3
; CHECK-AIX-64-P8-NEXT:    lxvw4x v3, 0, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v4, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    ld r5, L..C4(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-64-P9-NEXT:    mtfprwz f1, r3
; CHECK-AIX-64-P9-NEXT:    lxv vs0, 0(r5)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs1, vs0
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    sth r4, -16(r1)
; CHECK-AIX-32-P8-NEXT:    sth r3, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16rhs:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    sth r4, -16(r1)
; CHECK-AIX-32-P9-NEXT:    sth r3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
  %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
  %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
  %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) {
; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r7
; CHECK-LE-P8-NEXT:    mtfprd f1, r8
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r7
; CHECK-LE-P9-NEXT:    mtvsrws v3, r8
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r3, r7, 48
; CHECK-BE-P8-NEXT:    sldi r4, r8, 32
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r3, r7, 48
; CHECK-BE-P9-NEXT:    mtvsrws v3, r8
; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 32
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P9-NEXT:    mtvsrws v3, r4
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0
  %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
  %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) {
; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r7
; CHECK-LE-P8-NEXT:    mtfprd f1, r8
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r7
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    mtfprd f0, r8
; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
; CHECK-LE-P9-NEXT:    vmrglb v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r3, r7, 48
; CHECK-BE-P8-NEXT:    mtvsrd v3, r8
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    sldi r3, r7, 48
; CHECK-BE-P9-NEXT:    mtvsrd v3, r8
; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
; CHECK-BE-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    sldi r3, r3, 48
; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r4
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    sth r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghb v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0
  %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0
  %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v4i32_v4i32(i32 %arg, i32 %arg1, <4 x i32> %a, <4 x i32> %b) {
; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprwz f0, r3
; CHECK-LE-P8-NEXT:    mtfprwz f1, r4
; CHECK-LE-P8-NEXT:    xxmrghw v2, vs1, vs0
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprwz f0, r3
; CHECK-LE-P9-NEXT:    mtfprwz f1, r4
; CHECK-LE-P9-NEXT:    xxmrghw v2, vs1, vs0
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    mtvsrwz v2, r4
; CHECK-BE-P8-NEXT:    mtvsrwz v3, r3
; CHECK-BE-P8-NEXT:    vmrgow v2, v3, v2
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrwz v2, r4
; CHECK-BE-P9-NEXT:    mtvsrwz v3, r3
; CHECK-BE-P9-NEXT:    vmrgow v2, v3, v2
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-64-P8-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-64-P8-NEXT:    vmrgow v2, v3, v2
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v2, r4
; CHECK-AIX-64-P9-NEXT:    mtvsrwz v3, r3
; CHECK-AIX-64-P9-NEXT:    vmrgow v2, v3, v2
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    stw r4, -16(r1)
; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stw r4, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <4 x i32> %a, i32 %arg, i32 0
  %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
  %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r4
; CHECK-LE-P9-NEXT:    mtvsrws v2, r3
; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    sldi r3, r3, 32
; CHECK-BE-P8-NEXT:    sldi r4, r4, 48
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    mtvsrd v3, r4
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrws v2, r3
; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    sldi r3, r3, 32
; CHECK-AIX-64-P8-NEXT:    sldi r4, r4, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r4
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrws v2, r3
; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r5, r1, -16
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r5
; CHECK-AIX-32-P8-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    sth r4, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
  %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
  %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-LE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-LE-P8-NEXT:    vmrghh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-LE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-LE-P9-NEXT:    vmrghh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-BE-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    lxsdx v2, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r3)
; CHECK-AIX-64-P9-NEXT:    lxsd v3, 0(r4)
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r5, L..C8(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v3, 0, r4
; CHECK-AIX-32-P8-NEXT:    xxspltw v2, vs0, 1
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lxvwsx vs0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C4(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxsiwzx v2, 0, r4
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <8 x i8>, ptr %a, align 4
  %bc1 = bitcast <8 x i8> %0 to i64
  %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0
  %1 = load <2 x i8>, ptr %b, align 8
  %bc2 = bitcast <2 x i8> %1 to i16
  %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
  %2 = bitcast <2 x i64> %vecinit3 to <16 x i8>
  %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32> %b) {
; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P8-NEXT:    xxswapd vs1, vs1
; CHECK-LE-P8-NEXT:    xxmrglw v2, vs1, vs0
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r3
; CHECK-LE-P9-NEXT:    mtvsrws vs1, r4
; CHECK-LE-P9-NEXT:    xxswapd vs0, vs0
; CHECK-LE-P9-NEXT:    xxmrglw v2, vs1, vs0
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    mtfprd f0, r3
; CHECK-BE-P8-NEXT:    sldi r3, r4, 32
; CHECK-BE-P8-NEXT:    mtfprd f1, r3
; CHECK-BE-P8-NEXT:    xxmrghw v2, vs0, vs1
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrws vs1, r4
; CHECK-BE-P9-NEXT:    mtfprd f0, r3
; CHECK-BE-P9-NEXT:    xxmrghw v2, vs0, vs1
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    mtfprd f0, r3
; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 32
; CHECK-AIX-64-P8-NEXT:    mtfprd f1, r3
; CHECK-AIX-64-P8-NEXT:    xxmrghw v2, vs0, vs1
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrws vs1, r4
; CHECK-AIX-64-P9-NEXT:    mtfprd f0, r3
; CHECK-AIX-64-P9-NEXT:    xxmrghw v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
; CHECK-AIX-32-P8-NEXT:    xxmrghw v2, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stw r5, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs0, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -32(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs1, vs0
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <2 x i64> %a, i64 %arg, i32 0
  %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
  %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) {
; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    mtfprd f0, r3
; CHECK-LE-P8-NEXT:    mtfprd f1, r4
; CHECK-LE-P8-NEXT:    xxswapd v2, vs0
; CHECK-LE-P8-NEXT:    xxswapd v3, vs1
; CHECK-LE-P8-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    mtfprd f0, r3
; CHECK-LE-P9-NEXT:    xxswapd v2, vs0
; CHECK-LE-P9-NEXT:    mtfprd f0, r4
; CHECK-LE-P9-NEXT:    xxswapd v3, vs0
; CHECK-LE-P9-NEXT:    vmrglh v2, v3, v2
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    mtvsrd v2, r3
; CHECK-BE-P8-NEXT:    sldi r3, r4, 48
; CHECK-BE-P8-NEXT:    mtvsrd v3, r3
; CHECK-BE-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    mtvsrd v2, r3
; CHECK-BE-P9-NEXT:    sldi r3, r4, 48
; CHECK-BE-P9-NEXT:    mtvsrd v3, r3
; CHECK-BE-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P8-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P8-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    mtvsrd v2, r3
; CHECK-AIX-64-P9-NEXT:    sldi r3, r4, 48
; CHECK-AIX-64-P9-NEXT:    mtvsrd v3, r3
; CHECK-AIX-64-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -16
; CHECK-AIX-32-P8-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -32
; CHECK-AIX-32-P8-NEXT:    lxvw4x v2, 0, r4
; CHECK-AIX-32-P8-NEXT:    sth r5, -32(r1)
; CHECK-AIX-32-P8-NEXT:    lxvw4x v3, 0, r3
; CHECK-AIX-32-P8-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    sth r5, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v2, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lxv v3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    vmrghh v2, v2, v3
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0
  %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8>
  %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
  %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
  %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}

define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
; CHECK-LE-P8:       # %bb.0: # %entry
; CHECK-LE-P8-NEXT:    addis r5, r2, .LCPI24_0@toc@ha
; CHECK-LE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P8-NEXT:    lfdx f1, 0, r4
; CHECK-LE-P8-NEXT:    addi r3, r5, .LCPI24_0@toc@l
; CHECK-LE-P8-NEXT:    lxvd2x vs2, 0, r3
; CHECK-LE-P8-NEXT:    xxswapd v2, f0
; CHECK-LE-P8-NEXT:    xxswapd v3, f1
; CHECK-LE-P8-NEXT:    xxswapd v4, vs2
; CHECK-LE-P8-NEXT:    vperm v2, v3, v2, v4
; CHECK-LE-P8-NEXT:    blr
;
; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
; CHECK-LE-P9:       # %bb.0: # %entry
; CHECK-LE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-LE-P9-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
; CHECK-LE-P9-NEXT:    addi r3, r3, .LCPI24_0@toc@l
; CHECK-LE-P9-NEXT:    xxswapd v2, f0
; CHECK-LE-P9-NEXT:    lfd f0, 0(r4)
; CHECK-LE-P9-NEXT:    xxswapd v3, f0
; CHECK-LE-P9-NEXT:    lxv vs0, 0(r3)
; CHECK-LE-P9-NEXT:    xxperm v2, v3, vs0
; CHECK-LE-P9-NEXT:    blr
;
; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
; CHECK-BE-P8:       # %bb.0: # %entry
; CHECK-BE-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P8-NEXT:    addis r5, r2, .LCPI24_0@toc@ha
; CHECK-BE-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-BE-P8-NEXT:    addi r3, r5, .LCPI24_0@toc@l
; CHECK-BE-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-BE-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-BE-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-BE-P8-NEXT:    blr
;
; CHECK-BE-P9-LABEL: test_v4i32_v2i64:
; CHECK-BE-P9:       # %bb.0: # %entry
; CHECK-BE-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-BE-P9-NEXT:    addis r3, r2, .LCPI24_0@toc@ha
; CHECK-BE-P9-NEXT:    lxsd v2, 0(r4)
; CHECK-BE-P9-NEXT:    addi r3, r3, .LCPI24_0@toc@l
; CHECK-BE-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-BE-P9-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-BE-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-BE-P9-NEXT:    blr
;
; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P8:       # %bb.0: # %entry
; CHECK-AIX-64-P8-NEXT:    ld r5, L..C9(r2) # %const.0
; CHECK-AIX-64-P8-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P8-NEXT:    lxsdx v3, 0, r4
; CHECK-AIX-64-P8-NEXT:    xxsldwi v2, f0, f0, 1
; CHECK-AIX-64-P8-NEXT:    lxvw4x v4, 0, r5
; CHECK-AIX-64-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-64-P8-NEXT:    blr
;
; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-64-P9:       # %bb.0: # %entry
; CHECK-AIX-64-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-64-P9-NEXT:    ld r3, L..C5(r2) # %const.0
; CHECK-AIX-64-P9-NEXT:    lxsd v2, 0(r4)
; CHECK-AIX-64-P9-NEXT:    xxsldwi vs0, f0, f0, 1
; CHECK-AIX-64-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-64-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-64-P9-NEXT:    blr
;
; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P8:       # %bb.0: # %entry
; CHECK-AIX-32-P8-NEXT:    lwz r5, 4(r4)
; CHECK-AIX-32-P8-NEXT:    lxsiwzx v2, 0, r3
; CHECK-AIX-32-P8-NEXT:    stw r5, -16(r1)
; CHECK-AIX-32-P8-NEXT:    lwz r3, 0(r4)
; CHECK-AIX-32-P8-NEXT:    addi r4, r1, -32
; CHECK-AIX-32-P8-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P8-NEXT:    addi r3, r1, -16
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs0, 0, r3
; CHECK-AIX-32-P8-NEXT:    lwz r3, L..C9(r2) # %const.0
; CHECK-AIX-32-P8-NEXT:    lxvw4x vs1, 0, r4
; CHECK-AIX-32-P8-NEXT:    lxvw4x v4, 0, r3
; CHECK-AIX-32-P8-NEXT:    xxmrghw v3, vs1, vs0
; CHECK-AIX-32-P8-NEXT:    vperm v2, v2, v3, v4
; CHECK-AIX-32-P8-NEXT:    blr
;
; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
; CHECK-AIX-32-P9:       # %bb.0: # %entry
; CHECK-AIX-32-P9-NEXT:    lfiwzx f0, 0, r3
; CHECK-AIX-32-P9-NEXT:    lwz r3, 4(r4)
; CHECK-AIX-32-P9-NEXT:    stw r3, -16(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, 0(r4)
; CHECK-AIX-32-P9-NEXT:    lxv vs1, -16(r1)
; CHECK-AIX-32-P9-NEXT:    stw r3, -32(r1)
; CHECK-AIX-32-P9-NEXT:    lwz r3, L..C5(r2) # %const.0
; CHECK-AIX-32-P9-NEXT:    lxv vs2, -32(r1)
; CHECK-AIX-32-P9-NEXT:    xxmrghw v2, vs2, vs1
; CHECK-AIX-32-P9-NEXT:    lxv vs1, 0(r3)
; CHECK-AIX-32-P9-NEXT:    xxperm v2, vs0, vs1
; CHECK-AIX-32-P9-NEXT:    blr
entry:
  %0 = load <4 x i8>, ptr %a, align 4
  %bc1 = bitcast <4 x i8> %0 to i32
  %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0
  %1 = load <8 x i8>, ptr %b, align 8
  %bc2 = bitcast <8 x i8> %1 to i64
  %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0
  %2 = bitcast <4 x i32> %vecinit3 to <16 x i8>
  %3 = bitcast <2 x i64> %vecinit6 to <16 x i8>
  %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
  ret <16 x i8> %shuffle
}
